***************************************************************************
*** HH characteristics - merging all characteristics for balance tests  ***
***************************************************************************

	*** define locals ***
		tempfile r1s1table r1s2table r1s24table r1s1

*********************************
********** BASELINE *************
*********************************

* open baseline * 
	
	use "$data2010/R1_Section_1.dta", clear
					
	* Generate Adult Gender Variables
	
	gen adultmale=0 if !missing(s1_q2) & !missing(s1_q4a) & !missing(s1_q4b)
	replace adultmale=1 if s1_q2==1 & (s1_q4a>=15 & s1_q4b==0)
	
	gen adultfemale=0 if !missing(s1_q2) & !missing(s1_q4a) & !missing(s1_q4b)
	replace adultfemale=1 if s1_q2==0 & (s1_q4a>=15 & s1_q4b==0)
			
	sort hhid idp
	by hhid: egen nadultmale=sum(adultmale)
	by hhid: egen nadultfemale=sum(adultfemale)
	
	drop adultmale adultfemale

	* Generate Child Gender Variables
	
	gen childmale=0 if !missing(s1_q2) & !missing(s1_q4a) & !missing(s1_q4b)
	replace childmale=1 if s1_q2==1 & ((s1_q4a<6 & s1_q4b==0) | s1_q4b==1)
	
	gen childfemale=0 if !missing(s1_q2) & !missing(s1_q4a) & !missing(s1_q4b)
	replace childfemale=1 if s1_q2==0 & ((s1_q4a<6 & s1_q4b==0) | s1_q4b==1)
			
	sort hhid idp
	by hhid: egen nchildmale=sum(childmale)
	by hhid: egen nchildfemale=sum(childfemale)
	by hhid: egen nchildren=sum(childmale+childfemale)
	
	drop childmale childfemale
		
	* Polygenous HH *
	gen poly=0 if !missing(s1_q2) & !missing(s1_q4a) & !missing(s1_q4b)
	replace poly=1 if s1_q2==0 & (s1_q4a>=15 & s1_q4b==0) & (s1_q3==3 | s1_q3==4 | s1_q3==5)
	tab poly, m
	
	by hhid: egen wives=max(poly)
	tab wives, m 
	
	* Generate Female Headed HH
	gen fh=0 if !missing(s1_q3) & !missing(s1_q2)
	replace fh=1 if s1_q3==1 & s1_q2==0
	tab fh, m
		
	by hhid: egen femalehead=max(fh)
	tab femalehead, m
	
	* save to merge with Section 2 *
	save `r1s1table', replace 
	
* merge in Section 24 to Section 1 to identify the moms * 	
	* duplicates drop in section 24 *
	use "$data2010/R1_Section_24", clear
		duplicates drop hhid idp_mother, force
		sort hhid idp_mother 
		rename idp_mother idp
		gen mom=1
	save `r1s24table', replace
		
	use "$data2010/R1_Section_1", replace
	merge 1:1 hhid idp using `r1s24table', keepusing(mom)
	drop _m
	
	* Polygenous HH *
	gen poly=0 if !missing(s1_q2) & !missing(s1_q4a) & !missing(s1_q4b)
	replace poly=1 if s1_q2==0 & (s1_q4a>=15 & s1_q4b==0) & (s1_q3==3 | s1_q3==4 | s1_q3==5)
	tab poly, m
	
	by hhid: egen wives=max(poly)
	tab wives, m 
	
	* calculate number of hh with cowives in the program *
	by hhid: egen num_moms=sum(mom)
	tab num_moms
	
	g wives_prog=1 if num_moms>1 & wives==1
	replace wives_prog=0 if num_moms>1 & wives==0
	replace wives_prog=0 if num_moms<=1 & wives==0
	replace wives_prog=0 if num_moms<=1 & wives==1
	tab wives_prog
	
	saveold `r1s1', replace 
	
* open baseline Section 2 *
	use "$data2010\R1_Section_2", clear
	
	sort hhid idp
		
		* merge in relation to hh head and moms *
		merge 1:1 hhid idp using `r1s1', keepusing (s1_q3 mom wives_prog) nogen keep(mat)
		
	* generat hh head education *
	gen heducpr=0 if s1_q3==1 
	replace heducpr=1 if s1_q3==1 & s2_q6<7 & s2_q6!=.

	gen heducsec=0 if s1_q3==1 
	replace heducsec=1 if s1_q3==1 & s2_q6>=7 & s2_q6!=.
	
	* generate education of mothers * 
	gen educpr=0 if mom==1 
	replace educpr=1 if mom==1 & s2_q6<7 & s2_q6!=.

	gen educsec=0 if mom==1
	replace educsec=1 if mom==1 & s2_q6>=7 & s2_q6!=.
	
	* keep only new vars * 
	keep hhid idp heducpr heducsec educpr educsec
	
	* saving *
	sort hhid idp
	save `r1s2table', replace 
	
* merge section 1 and section 2 for table 
	use `r1s1table', clear
	sort hhid idp
	merge 1:1 hhid idp using `r1s2table'
	* _m==2 is 0, so keeping everything that was in section 1
	drop _m
		 
* drop unnecessary vars *
	*drop newHH twins s1_q5 s1_q6 s1_q7 s1_q8 idp1 s1_q9 s1_q10_1 s1_q11_1 s1_q12_1 idp2 s1_q10_2 s1_q11_2 s1_q12_2 idp3 s1_q10_3 s1_q11_3 s1_q12_3
	
	order hhid idp treatment hh_size hh_size nadultmale nadultfemale femalehead nchildren heducpr heducsec educpr educsec
	
* collapse * 
	
	collapse (mean) hh_size nadultmale nadultfemale wives femalehead nchildren heducpr heducsec educpr educsec, by (hhid treatment)
	
	lab var hh_size "HH size (residents present at least 6 months)"
	lab var nadultmale "Number of Adult Males" 
	lab var nadultfemale "Number of Adult Females"
	lab var femalehead "Female Headed HH"
	lab var nchildren "Number of Children under 6 mo"
	lab var heducpr "Education of HH Head (% completed): Primary" 
	lab var heducsec "Education of HH Head (% completed): Secondary"
	lab var educpr "Education of Mothers (% completed): Primary" 
	lab var educsec "Education of Mothers (% completed): Secondary"
	lab var wives "Polygamous household"
	
	g anyeduc_head= ceil(heducpr)+ceil(heducsec)
	g anyeduc_mother=ceil(educpr)+ceil(educsec)
	
	lab var anyeduc_head "HH head has any primary or secondary education"
	lab var anyeduc_mother "Mother has any primary or secondary education"

	sort hhid
	save "$do/hh", replace

**************************
*** HH characteristics ***
**************************

	use "$data2010/R1_Section_4.dta", clear	
		keep hhid s4_q1_1 s4_q2_1
		rename s4_q1_1 floor
		rename s4_q2_1 roof
		sort hhid
	
	merge hhid using "$do/hh"
	tab _merge
	drop _merge
	sort hhid
	save "$do/hh", replace

************************************
*** HH asset holdings by gender ***
***********************************

* open baseline * 

	use "$data2010\R1_Section_5.dta", clear
		
	sort hhid asset

	* corrections *
		* reported quantities, but said they don't own category *
		replace s5_q1=1 if s5_q2!=. & s5_q2>0
		replace s5_q4=1 if s5_q5!=. & s5_q5>0
		
		recode s5_q2 s5_q5 s5_q3 s5_q6 (.=0)
		
	* dropping outliers (+/- 3 SD from mean), LB Nov 4, 2013*
	forvalue i=2(1)5 {
		drop if outlier`i'==1
		}
	
	* create categories of assets *
	gen asset_group=0
	replace asset_group=1 if inlist(asset,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32) // Durables
	replace asset_group=2 if inlist(asset,33,34,35,36,37,38,39,40,41,42) // Agricultural Capital
	replace asset_group=3 if inlist(asset,43,44,45,46,47,48,49) // Other 
	replace asset_grou=. if asset_grou==0
	
	* create asset group sums * 
	bys hhid asset_group: egen men_count=sum(s5_q2)
	bys hhid asset_group: egen men_value=sum(s5_q3)
	bys hhid asset_group: egen women_count=sum(s5_q5)
	bys hhid asset_group: egen women_value=sum(s5_q6)
	bys hhid asset_group: egen hh_count=mean(men_count+women_count)
	bys hhid asset_group: egen hh_value=mean(men_value+women_value)	
	
	* create collapsing vars * 
		* men *
		gen men_durables_count=men_count if asset_group==1 
		gen men_ag_cap_count=men_count if asset_group==2
		by hhid: egen men_asset_count=sum(s5_q2)
				
		gen men_durables_value=men_value if asset_group==1
		gen men_ag_cap_value=men_value if asset_group==2
		by hhid: egen men_asset_value=sum(s5_q3)
		
		* women *
		gen women_durables_count=women_count if asset_group==1
		gen women_ag_cap_count=women_count if asset_group==2
		by hhid: egen women_asset_count=sum(s5_q5)
		
		gen women_durables_value=women_value if asset_group==1
		gen women_ag_cap_value=women_value if asset_group==2
		by hhid: egen women_asset_value=sum(s5_q6)
		
		* hh * 
		gen hh_durables_count=hh_count if asset_group==1 
		gen hh_ag_cap_count=hh_count if asset_group==2
		gen hh_asset_count=men_asset_count+women_asset_count
				
		gen hh_durables_value=hh_value if asset_group==1 
		gen hh_ag_cap_value=hh_value if asset_group==2
		gen hh_asset_value=women_asset_value+men_asset_value

		
	* collapse in order of table *
	collapse (mean) men_durables_count women_durables_count hh_durables_count men_ag_cap_count women_ag_cap_count hh_ag_cap_count men_asset_count women_asset_count hh_asset_count men_durables_value women_durables_value hh_durables_value men_ag_cap_value women_ag_cap_value hh_ag_cap_value men_asset_value women_asset_value hh_asset_value, by (hhid treatment)
				
	* label asset group after collapsting 

	lab var men_durables_count "Durables count - men"
	lab var men_durables_value "Durables value - men"
	lab var women_durables_count "Durables count - women"
	lab var women_durables_value "Durables value - women"
	lab var men_ag_cap_count "Ag capital count - men" 
	lab var men_ag_cap_value "Ag capital value - men" 
	lab var women_ag_cap_count "Ag capital count - women"
	lab var women_ag_cap_value "Ag capital value - women"
	lab var hh_durables_count "Durables count - HH"
	lab var hh_durables_value "Durables values - HH"
	lab var hh_ag_cap_count "Ag capital count - HH"
	lab var hh_ag_cap_value "Ag capital value - HH" 
	
	lab var men_asset_count "Asset count - men"
	lab var men_asset_value "Asset value - men" 
	lab var women_asset_count "Asset count - women"
	lab var women_asset_value "Asset value - women"
	lab var hh_asset_count "Asset count - HH"
	lab var hh_asset_value "Asset value - HH"
	
	lab var treatment "Treatment arm"
	
	keep hhid treatment men_asset_value women_asset_value
	
sort hhid
merge hhid using "$do/hh"
tab _merge
drop _merge	
	
save "$do/hh", replace
